First look at the seven variables I chose in terms of box-and-jitter plots:
Autocorrelation function:
library(pastecs)
corrFiles <- list.files("/Volumes/PhD/BijelData/Batch_II_full_resolution_ED_images/Autocorrelation", pattern=".txt", full.names = TRUE)
corrFileNames <- list.files("/Volumes/PhD/BijelData/Batch_II_full_resolution_ED_images/Autocorrelation", pattern=".txt")
autoCorr <- do.call(cbind, lapply(corrFiles, read.csv, header=FALSE))
colnames(autoCorr) <- corrFileNames
exp_Data <- read.csv("/Volumes/PhD/BijelData/Bijel_Data_Cleaner_ToRead.csv", na.strings = "?")
exp_Data$Sample.Number <- as.character(exp_Data$Sample.Number)
corrFileID <- sapply(strsplit(corrFileNames,"_"), `[`,1) #`[` is a function that takes the subset of x, the input to this function is x (strsplit...) and the element of x that I want, ie the 1st one
colnames(autoCorr) <- corrFileID
rownames(exp_Data) <- exp_Data$Sample.Number
autoCorr_transpose <- data.frame(t(autoCorr))
exp_Data$Autocorrelation <- autoCorr_transpose[match(row.names(exp_Data),row.names(autoCorr_transpose)),c(1:256)]
turningPoints <- lapply(1:135, function(y) turnpoints(unlist(exp_Data$Autocorrelation[y,])))
exp_Data$Auto.Turning.Points <- turningPoints
firstTurn <- lapply(1:135, function(y) exp_Data$Auto.Turning.Points[[y]]$tppos[[1]])
exp_Data$Auto.First.Turn <- unlist(firstTurn)
peakDepth <- lapply(1:135, function(y) exp_Data$Autocorrelation[[y,exp_Data$Auto.Turning.Points[[y]]$tppos[[2]]]]-exp_Data$Autocorrelation[[y,exp_Data$Auto.Turning.Points[[y]]$tppos[[1]]]])
exp_Data$Auto.Peak.Depth <- unlist(peakDepth)
exp_Data$Auto.Num.Turns <- unlist(lapply(1:135, function(y) length(exp_Data$Auto.Turning.Points[[y]]$tppos)))
Radial profile:
radFiles <- list.files("/Volumes/PhD/BijelData/Batch_II_full_resolution_ED_images/Profiles", pattern=".txt", full.names = TRUE)
radFileNames <- list.files("/Volumes/PhD/BijelData/Batch_II_full_resolution_ED_images/Profiles", pattern=".txt")
radProf_data <- do.call(cbind, lapply(radFiles, read.csv, header=FALSE))
colnames(radProf_data) <- radFileNames
radFileID <- sapply(strsplit(radFileNames,"_"), `[`,1) #`[` is a function that takes the subset of x, the input to this function is x (strsplit...) and the element of x that I want, ie the 1st one
colnames(radProf_data) <- radFileID
rownames(exp_Data) <- exp_Data$Sample.Number
radProf_data_transpose <- data.frame(t(radProf_data))
exp_Data$Radial.Profile <- radProf_data_transpose[match(row.names(exp_Data),row.names(radProf_data_transpose)),c(1:256)]
r <- c(1:256)
y <- exp_Data$Radial.Profile[2:20]
lineFits <- lapply(1:135, function(n) lm(unlist(y[n,]) ~ r[2:20]))
lineCoeffs <- lapply(lineFits, function(m) m$coefficients)
lineGradients <- lapply (1:135, function(p) unname(lineCoeffs[[p]][2]))
exp_Data$Rad.Line.Gradients <- unlist(lineGradients)
avVal <- lapply(1:135, function(n) mean(unlist(exp_Data$Radial.Profile[n,29:31])))
exp_Data$Rad.Val.30 <- unlist(avVal)
jumps <- function(y,n){
a <- exp_Data$Radial.Profile[[y,n+5]]
b <- exp_Data$Radial.Profile[[y,n]]
out <- a-b
return(out)
}
#nMax <- lapply(1:135, function(y) length(exp_Data$Radial.Profile[[y]])-1)
jumpSizes <- lapply(1:135, function(y) mapply(jumps, y, 1:251))
maxJump <- lapply(1:135, function(y) max(jumpSizes[[y]]))
exp_Data$Max.Rad.Jump <- unlist(maxJump)
maxJumpX <- lapply(1:135, function(y) which.max(jumpSizes[[y]]))
exp_Data$Max.Rad.Jump.X <- unlist(maxJumpX)
Plot them all.
library(ggplot2)
png(file='/Users/s1101153/Desktop/bijels1_graphs/bjp_auto-num-turns.png', width=600, height=400)
ggplot(exp_Data, aes(x=as.factor(Bijel), y=Auto.Num.Turns, fill=Bijel)) + geom_boxplot(alpha=0.3) + geom_jitter(alpha=0.5) + xlab("Bijel?") + ylab("Number of turning points in ACF")+theme(legend.position="none", text = element_text(size=24), axis.title = element_text(size=21))
dev.off()
null device
1
png(file='/Users/s1101153/Desktop/bijels1_graphs/bjp_auto-peak-depth.png', width=600, height=400)
ggplot(exp_Data, aes(x=as.factor(Bijel), y=Auto.Peak.Depth, fill=Bijel)) + geom_boxplot(alpha=0.3) + geom_jitter(alpha=0.5) + xlab("Bijel?") + ylab("Depth of first ACF trough/peak pair")+theme(legend.position="none", text = element_text(size=24), axis.title = element_text(size=21))
dev.off()
null device
1
png(file='/Users/s1101153/Desktop/bijels1_graphs/bjp_auto-first-turn.png', width=600, height=400)
ggplot(exp_Data, aes(x=as.factor(Bijel), y=Auto.First.Turn, fill=Bijel)) + geom_boxplot(alpha=0.3) + geom_jitter(alpha=0.5) +
xlab("Bijel?") + ylab("Position of first ACF turning point")+theme(legend.position="none", text = element_text(size=24), axis.title = element_text(size=21))
dev.off()
null device
1
png(file='/Users/s1101153/Desktop/bijels1_graphs/bjp_rad-val-30.png', width=600, height=400)
ggplot(exp_Data, aes(x=as.factor(Bijel), y=Rad.Val.30, fill=Bijel)) + geom_boxplot(alpha=0.3) + geom_jitter(alpha=0.5) + xlab("Bijel?") + ylab("Average of points 29-31 of SF")+theme(legend.position="none", text = element_text(size=24), axis.title = element_text(size=21))
dev.off()
null device
1
png(file='/Users/s1101153/Desktop/bijels1_graphs/bjp_rad-line-gradients.png', width=600, height=400)
ggplot(exp_Data, aes(x=as.factor(Bijel), y=Rad.Line.Gradients, fill=Bijel)) + geom_boxplot(alpha=0.3) + geom_jitter(alpha=0.5) + xlab("Bijel?") + ylab("Gradient of first 20 points of SF")+theme(legend.position="none", text = element_text(size=24), axis.title = element_text(size=21))
dev.off()
null device
1
png(file='/Users/s1101153/Desktop/bijels1_graphs/bjp_rad-max-jump.png', width=600, height=400)
ggplot(exp_Data, aes(x=as.factor(Bijel), y=Max.Rad.Jump, fill=Bijel)) + geom_boxplot(alpha=0.3) + geom_jitter(alpha=0.5) + xlab("Bijel?") + ylab("Height of biggest positive jump in SF")+theme(legend.position="none", text = element_text(size=24), axis.title = element_text(size=21))
dev.off()
null device
1
png(file='/Users/s1101153/Desktop/bijels1_graphs/bjp_rad-max-jump-x.png', width=600, height=400)
ggplot(exp_Data, aes(x=as.factor(Bijel), y=Max.Rad.Jump.X, fill=Bijel)) + geom_boxplot(alpha=0.3) + geom_jitter(alpha=0.5) + xlab("Bijel?") + ylab("Position (in r) of biggest jump in SF")+theme(legend.position="none", text = element_text(size=24), axis.title = element_text(size=21))
dev.off()
null device
1
Plot examples of the ACF and SF too:
x=c(1:255)
xSF=x*640.17/512
L <- 640.17
pixel_to_micron <- 2*pi/L
#example SF plots (liquid channel)
bijelSF_l <- unlist(read.csv("/Volumes/PhD/BijelData/SF_bothChannels/52ii_Image23.tif_radProf_channel0.txt"))
noBijelSF_l <- unlist(read.csv("/Volumes/PhD/BijelData/SF_bothChannels/54i_Image9.tif_radProf_channel0.txt"))
png('~/Desktop/bijels1_graphs/SFexamples_liq.png', res=300, width=1800, height=1200)
plot(xSF, bijelSF_l, type="l", lwd=2, col="red", xlab = "q (1/μm)", ylab="Structure Factor", ylim=c(0.4,2.2),cex.axis=1.5,cex.lab=2,mgp=c(2.2,0.7,0))
lines(xSF, noBijelSF_l, lwd=2)
legend("topright", legend = c("Bijel", "Non-bijel"), col=c("red", "black"), lwd=2)#, cex=1.4)
dev.off()
null device
1
#example ACF plots (liquid channel)
bijelACF_l <- unlist(read.csv("/Volumes/PhD/BijelData/LiquidChannel/autoCorr/52ii_Image23.tif_autoCorr_channel0.txt"))
noBijelACF_l <- unlist(read.csv("/Volumes/PhD/BijelData/LiquidChannel/autoCorr/54i_Image9.tif_autoCorr_channel0.txt"))
x=c(1:255)
xACF=x*pixel_to_micron
png('~/Desktop/bijels1_graphs/ACFexamples_liq.png', res=300, width=1800, height=1200)
plot(xACF, bijelACF_l, type="l", lwd=2, col="red", xlab = "r (μm)", ylab="Autocorrelation Function",cex.axis=1.5,cex.lab=2,mgp=c(2.2,0.7,0))
lines(xACF, noBijelACF_l, lwd=2)
legend("topright", legend = c("Bijel", "Non-bijel"), col=c("red", "black"), lwd=2)
dev.off()
null device
1
Now scale the data for accurate results:
attach(exp_Data)
The following object is masked from nbDat (pos = 3):
Bijel
The following object is masked from bDat (pos = 4):
Bijel
The following object is masked from bDat (pos = 5):
Bijel
The following object is masked from nbDat (pos = 6):
Bijel
The following object is masked from bDat (pos = 7):
Bijel
The following objects are masked from exp_Data (pos = 8):
Ammonia..g, Bijel, Bubbles, Date.Made, Droplets, Ethanediol, Excess.Particles, HMDS..g, HMDS.Ratio, Nitromethane,
Nitromethane.mass.fraction, Particle.Batch, Particle.Volume.Fraction..., Particles, Quench.Method, Sample.Number
The following objects are masked from exp_Data (pos = 9):
Ammonia..g, Bijel, Bubbles, Date.Made, Droplets, Ethanediol, Excess.Particles, HMDS..g, HMDS.Ratio, Nitromethane,
Nitromethane.mass.fraction, Particle.Batch, Particle.Volume.Fraction..., Particles, Quench.Method, Sample.Number
The following object is masked from nbDat (pos = 10):
Bijel
The following object is masked from bDat (pos = 11):
Bijel
The following object is masked from nbDat (pos = 12):
Bijel
The following object is masked from bDat (pos = 13):
Bijel
The following objects are masked from exp_Data (pos = 14):
Ammonia..g, Bijel, Bubbles, Date.Made, Droplets, Ethanediol, Excess.Particles, HMDS..g, HMDS.Ratio, Nitromethane,
Nitromethane.mass.fraction, Particle.Batch, Particle.Volume.Fraction..., Particles, Quench.Method, Sample.Number
The following objects are masked from exp_Data (pos = 15):
Ammonia..g, Bijel, Bubbles, Date.Made, Droplets, Ethanediol, Excess.Particles, HMDS..g, HMDS.Ratio, Nitromethane,
Nitromethane.mass.fraction, Particle.Batch, Particle.Volume.Fraction..., Particles, Quench.Method, Sample.Number
The following object is masked from nbDat (pos = 16):
Bijel
The following object is masked from bDat (pos = 17):
Bijel
The following objects are masked from exp_Data (pos = 18):
Ammonia..g, Bijel, Bubbles, Date.Made, Droplets, Ethanediol, Excess.Particles, HMDS..g, HMDS.Ratio, Nitromethane,
Nitromethane.mass.fraction, Particle.Batch, Particle.Volume.Fraction..., Particles, Quench.Method, Sample.Number
The following objects are masked from nbDat (pos = 19):
Auto.First.Turn, Bijel
The following objects are masked from bDat (pos = 20):
Auto.First.Turn, Bijel
The following objects are masked from nbDat (pos = 21):
Auto.First.Turn, Bijel
The following objects are masked from bDat (pos = 22):
Auto.First.Turn, Bijel
The following object is masked from nbDat (pos = 23):
Bijel
The following object is masked from bDat (pos = 24):
Bijel
The following object is masked from nbDat (pos = 25):
Bijel
The following object is masked from bDat (pos = 26):
Bijel
The following objects are masked from exp_Data (pos = 32):
Ammonia..g, Auto.First.Turn, Auto.Num.Turns, Auto.Peak.Depth, Auto.Turning.Points, Autocorrelation, Bijel, Bubbles,
Date.Made, Droplets, Ethanediol, Excess.Particles, HMDS..g, HMDS.Ratio, Max.Rad.Jump, Max.Rad.Jump.X, Nitromethane,
Nitromethane.mass.fraction, Particle.Batch, Particle.Volume.Fraction..., Particles, Quench.Method,
Rad.Line.Gradients, Rad.Val.30, Radial.Profile, Sample.Number
dat=data.frame(Auto.Num.Turns, Auto.Peak.Depth, Rad.Val.30, Rad.Line.Gradients, Max.Rad.Jump, Max.Rad.Jump.X, Auto.First.Turn)
means = unlist(lapply(1:7, function(n) mean(unlist(dat[n][]))))
datScaled = data.frame(lapply(1:7, function(n) dat[n][]*means[1]/means[n]))
datScaled$Bijel = Bijel
head(datScaled)
Now run decision tree (ignore this for now…)
library(caret)
library(rpart.plot)
set.seed(1234)
trCtrl <- trainControl(method="repeatedcv", number=10, repeats = 3)
dtreeFit <- train(Bijel ~., data=datScaled, method="rpart", parms=list(split="gini"), trControl=trCtrl, tuneLength=10)
dtreeFit
CART
135 samples
7 predictor
2 classes: 'n', 'y'
No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times)
Summary of sample sizes: 122, 122, 122, 121, 122, 121, ...
Resampling results across tuning parameters:
cp Accuracy Kappa
0.00000000 0.7912210 0.5187700
0.05426357 0.8372405 0.5757128
0.10852713 0.8372405 0.5757128
0.16279070 0.8372405 0.5757128
0.21705426 0.8372405 0.5757128
0.27131783 0.8372405 0.5757128
0.32558140 0.8372405 0.5757128
0.37984496 0.8372405 0.5757128
0.43410853 0.8372405 0.5757128
0.48837209 0.7065812 0.1199017
Accuracy was used to select the optimal model using the largest value.
The final value used for the model was cp = 0.4341085.
prp(dtreeFit$finalModel, box.palette = "Reds", tweak=1.2, extra=101)
# table(Predict=dtreeFit$pred(datScaled), true=Bijel)
Now knn:
#knn - all 7 variables
set.seed(1234)
knn_fit <- train(Bijel ~., data=datScaled, method="knn", trControl=trCtrl, tuneLength=42)
error=1-knn_fit$results[row.names(knn_fit$bestTune),]$Accuracy
error
[1] 0.2149573
knn_fit
k-Nearest Neighbors
135 samples
7 predictor
2 classes: 'n', 'y'
No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times)
Summary of sample sizes: 122, 122, 122, 121, 122, 121, ...
Resampling results across tuning parameters:
k Accuracy Kappa
5 0.7305739 0.35631464
7 0.7303907 0.34922530
9 0.7329792 0.35084850
11 0.7628083 0.40815297
13 0.7720147 0.42145392
15 0.7771429 0.44347609
17 0.7778266 0.45160919
19 0.7753114 0.44145188
21 0.7725641 0.45235690
23 0.7554823 0.41454809
25 0.7510867 0.41227897
27 0.7487057 0.41254685
29 0.7611600 0.45148930
31 0.7611600 0.45148930
33 0.7611600 0.45148930
35 0.7637241 0.45555376
37 0.7637241 0.45555376
39 0.7611600 0.44818781
41 0.7609768 0.44373489
43 0.7582295 0.43255068
45 0.7534676 0.42317590
47 0.7534676 0.42058487
49 0.7556899 0.42414218
51 0.7556899 0.42246529
53 0.7630403 0.43007752
55 0.7850427 0.46611045
57 0.7706838 0.41692672
59 0.7661050 0.36673823
61 0.7263126 0.18689266
63 0.6891331 0.03281686
65 0.6819902 0.00000000
67 0.6819902 0.00000000
69 0.6819902 0.00000000
71 0.6819902 0.00000000
73 0.6819902 0.00000000
75 0.6819902 0.00000000
77 0.6819902 0.00000000
79 0.6819902 0.00000000
81 0.6819902 0.00000000
83 0.6819902 0.00000000
85 0.6819902 0.00000000
87 0.6819902 0.00000000
Accuracy was used to select the optimal model using the largest value.
The final value used for the model was k = 55.
plot(knn_fit)
pairs(datScaled[-8], col=Bijel, main = "Data")
bijelPred <- predict(knn_fit, datScaled)
pairs(datScaled[-8], col=bijelPred, main="Predicted")
#Auto.Num.Turns removed
set.seed(1234)
knn_fit <- train(Bijel ~., data=datScaled[-1], method="knn", trControl=trCtrl, tuneLength=42)
error=1-knn_fit$results[row.names(knn_fit$bestTune),]$Accuracy
error
[1] 0.2149817
plot(knn_fit)
pairs(datScaled[2:7], col=Bijel, main = "Data")
bijelPred <- predict(knn_fit, datScaled[-1])
pairs(datScaled[2:7], col=bijelPred, main="Predicted")
#Auto.Peak.Depth removed
set.seed(1234)
knn_fit <- train(Bijel ~., data=datScaled[-2], method="knn", trControl=trCtrl, tuneLength=42)
error=1-knn_fit$results[row.names(knn_fit$bestTune),]$Accuracy
error
[1] 0.2146398
plot(knn_fit)
pairs(datScaled[c(1,3:7)], col=Bijel, main = "Data")
bijelPred <- predict(knn_fit, datScaled[-2])
pairs(datScaled[c(1,3:7)], col=bijelPred, main="Predicted")
#Rad.Val.30 removed
set.seed(1234)
knn_fit <- train(Bijel ~., data=datScaled[-3], method="knn", trControl=trCtrl, tuneLength=42)
error=1-knn_fit$results[row.names(knn_fit$bestTune),]$Accuracy
error
[1] 0.2149573
plot(knn_fit)
pairs(datScaled[c(1:2,4:7)], col=Bijel, main = "Data")
bijelPred <- predict(knn_fit, datScaled[-3])
pairs(datScaled[c(1:2,4:7)], col=bijelPred, main="Predicted")
#Rad.Line.Gradients removed
set.seed(1234)
knn_fit <- train(Bijel ~., data=datScaled[-4], method="knn", trControl=trCtrl, tuneLength=42)
error=1-knn_fit$results[row.names(knn_fit$bestTune),]$Accuracy
error
[1] 0.2175214
plot(knn_fit)
pairs(datScaled[c(1:3,5:7)], col=Bijel, main = "Data")
bijelPred <- predict(knn_fit, datScaled[-4])
pairs(datScaled[c(1:3,5:7)], col=bijelPred, main="Predicted")
#Max.Rad.Jump removed
set.seed(1234)
knn_fit <- train(Bijel ~., data=datScaled[-5], method="knn", trControl=trCtrl, tuneLength=42)
error=1-knn_fit$results[row.names(knn_fit$bestTune),]$Accuracy
error
[1] 0.2117338
plot(knn_fit)
pairs(datScaled[c(1:4,6:7)], col=Bijel, main = "Data")
bijelPred <- predict(knn_fit, datScaled[-5])
pairs(datScaled[c(1:4,6:7)], col=bijelPred, main="Predicted")
#Max.Rad.Jump.X removed
set.seed(1234)
knn_fit <- train(Bijel ~., data=datScaled[-6], method="knn", trControl=trCtrl, tuneLength=42)
error=1-knn_fit$results[row.names(knn_fit$bestTune),]$Accuracy
error
[1] 0.1900488
plot(knn_fit)
pairs(datScaled[c(1:5,7)], col=Bijel, main = "Data")
bijelPred <- predict(knn_fit, datScaled[-6])
pairs(datScaled[c(1:5,7)], col=bijelPred, main="Predicted")
#Auto.First.Turns removed
set.seed(1234)
knn_fit <- train(Bijel ~., data=datScaled[-7], method="knn", trControl=trCtrl, tuneLength=42)
error=1-knn_fit$results[row.names(knn_fit$bestTune),]$Accuracy
error
[1] 0.2439683
plot(knn_fit)
pairs(datScaled[c(1:7)], col=Bijel, main = "Data")
bijelPred <- predict(knn_fit, datScaled[-7])
pairs(datScaled[c(1:7)], col=bijelPred, main="Predicted")
Now remove all the variables whose removal makes the error lower than the original value of 0.2149573:
datImproved = datScaled[-6]
datImprovedSmall = datImproved[-5]
vars <- names(datImproved)
var_fstat <- sapply(vars, function(x) {
summary(lm(as.formula(paste(x, " ~ Bijel")), data = datImproved))$fstatistic[1]
})
using type = "numeric" with a factor response will be ignoredthe response appeared on the right-hand side and was droppedproblem with term 1 in model.matrix: no columns are assigned‘-’ not meaningful for factors‘^’ not meaningful for factors
sort(unlist(var_fstat), decreasing=TRUE)
Auto.First.Turn.value Rad.Line.Gradients.value Rad.Val.30.value Max.Rad.Jump.value Auto.Peak.Depth.value
61.57758959 49.81943975 27.27489050 13.70016954 12.21510388
Auto.Num.Turns.value
0.09194059
vars <- names(datImprovedSmall)
var_fstat <- sapply(vars, function(x) {
summary(lm(as.formula(paste(x, " ~ Bijel")), data = datImprovedSmall))$fstatistic[1]
})
using type = "numeric" with a factor response will be ignoredthe response appeared on the right-hand side and was droppedproblem with term 1 in model.matrix: no columns are assigned‘-’ not meaningful for factors‘^’ not meaningful for factors
sort(unlist(var_fstat), decreasing=TRUE)
Auto.First.Turn.value Rad.Line.Gradients.value Rad.Val.30.value Auto.Peak.Depth.value Auto.Num.Turns.value
61.57758959 49.81943975 27.27489050 12.21510388 0.09194059
stick with removing the two variables, i.e. use datImprovedSmall
errors <- rep(NA, 5)
set.seed(1234)
knn_fit_turn5 <- train(Bijel ~., data=datImprovedSmall, method="knn", trControl=trCtrl, tuneLength=42)
errors[1]=1-knn_fit_turn5$results[row.names(knn_fit_turn5$bestTune),]$Accuracy
#plot(knn_fit_turn)
set.seed(1234)
knn_fit_turn4 <- train(Bijel ~., data=datImprovedSmall[c("Bijel", "Auto.First.Turn", "Rad.Line.Gradients", "Rad.Val.30", "Auto.Peak.Depth")], method="knn", trControl=trCtrl, tuneLength=42)
errors[2]=1-knn_fit_turn4$results[row.names(knn_fit_turn4$bestTune),]$Accuracy
set.seed(1234)
knn_fit_turn3 <- train(Bijel ~., data=datImprovedSmall[c("Bijel", "Auto.First.Turn", "Rad.Line.Gradients", "Rad.Val.30")], method="knn", trControl=trCtrl, tuneLength=42)
errors[3]=1-knn_fit_turn3$results[row.names(knn_fit_turn3$bestTune),]$Accuracy
set.seed(1234)
knn_fit_turn2 <- train(Bijel ~., data=datImprovedSmall[c("Bijel", "Auto.First.Turn", "Rad.Line.Gradients")], method="knn", trControl=trCtrl, tuneLength=42)
errors[4]=1-knn_fit_turn2$results[row.names(knn_fit_turn2$bestTune),]$Accuracy
set.seed(1234)
knn_fit_turn1 <- train(Bijel ~., data=datImprovedSmall[c("Bijel", "Auto.First.Turn")], method="knn", trControl=trCtrl, tuneLength=42)
errors[5]=1-knn_fit_turn1$results[row.names(knn_fit_turn1$bestTune),]$Accuracy
Plot the results
errors
[1] 0.1775946 0.1869841 0.1774359 0.1774359 0.1639927
png(file='/Users/s1101153/Desktop/bijels1_graphs/error_vs_num_vars.png', width=600, height=400)
plot(c(5,4,3,2,1), errors, xlab="Number of variables in model", ylab="Classification error", pch=19, ylim=c(.15, .2))
dev.off()
null device
1
pred2 <- predict(knn_fit_turn2, data=datImprovedSmall[c("Bijel", "Auto.First.Turn", "Rad.Line.Gradients")])
png(file='/Users/s1101153/Desktop/bijels1_graphs/liquid_2vars.png', width=600, height=400)
plot(Auto.First.Turn, Rad.Line.Gradients, col=Bijel, pch=16)
points(Auto.First.Turn, Rad.Line.Gradients, col=pred2, pch=1, cex=1.5)
dev.off()
null device
1
pred1 <- predict(knn_fit_turn1, data=datImprovedSmall[c("Bijel", "Auto.First.Turn")])
set.seed(1234)
y=jitter(rep(0, each=135))
png(file='/Users/s1101153/Desktop/bijels1_graphs/liquid_result.png', width=600, height=400)
plot(y~Auto.First.Turn, col=Bijel, pch=16, yaxt='n', xlab="Position of first turning point in Liquid Channel ACF (μm)", ylab="", log="x")
points(y~Auto.First.Turn, col=pred1, pch=1, cex=1.5)
legend("topright", legend=c("Bijel", "Non-bijel", "Pred. bijel", "Pred. non-bijel"), pch=c(16,16,1,1), col=c("red", "black", "red", "black"))
dev.off()
null device
1
table()
Error in table() : nothing to tabulate
final_model <- train(Bijel ~., data=datImprovedSmall[c("Bijel", "Auto.First.Turn")], method="knn", trControl=trCtrl, tuneLength=42)
errors[5]=1-knn_fit_turn1$results[row.names(knn_fit_turn1$bestTune),]$Accuracy
png(file='/Users/s1101153/Desktop/bijels1_graphs/liquid_result_k_graph.png', width=600, height=400)
plot(final_model)
dev.off()
null device
1
pred_final <- predict(final_model, data=datImprovedSmall[c("Bijel", "Auto.First.Turn")])
pred_final
[1] y y y y n y y y y y n y n y y y y y y n y y n y y y y y n y n y n y y y y y y n y n n y y y y y y y y y y y y y y y y n y y y y
[65] y y y y n n y n y n y n y y y y n y n y y y y y y y y y y y y y y y y y y y n n y y y y y y y y y y y y n y y n y y y n y y n n
[129] y n y y y y y
Levels: n y
table(Predict=pred_final, true=Bijel)
true
Predict n y
n 24 3
y 19 89
First read in the new files (from sameSample_analysis.R in Git folder):
bijelFilesL <- list.files("/Volumes/PhD/BijelData/sample52ii/liquid", pattern=".txt", full.names = TRUE)
bijelFileNames <- list.files("/Volumes/PhD/BijelData/sample52ii/liquid", pattern=".txt")
failFilesL <- list.files("/Volumes/PhD/BijelData/sample54i/liquid", pattern=".txt", full.names = TRUE)
failFileNames <- list.files("/Volumes/PhD/BijelData/sample54i/liquid", pattern=".txt")
autoCorrBL <- do.call(cbind, lapply(bijelFilesL, read.csv, header=FALSE))
bijelID <- sapply(strsplit(bijelFileNames,"_"), `[`,1)
bijelLabs <- rep_len("y", length(bijelFileNames))
bDat <- data.frame(Bijel=bijelLabs)
rownames(bDat) <- bijelID
bDat$Liq <- data.frame(t(autoCorrBL))
autoCorrNBL <- do.call(cbind, lapply(failFilesL, read.csv, header=FALSE))
failID <- sapply(strsplit(failFileNames,"_"), `[`,1)
#colnames(autoCorrNBL) <- colnames(autoCorrNBP) <- failID
failLabs <- rep_len("n", length(failFileNames))
nbDat <- data.frame(Bijel=failLabs)
nbDat$Liq <- data.frame(t(autoCorrNBL))
lTurnsB <- lapply(1:8, function(n) turnpoints(unlist(bDat$Liq[n,])))
firstTurnB <- lapply(1:8, function(m) lTurnsB[[m]]$tppos[1])
bDat$Auto.First.Turn <- unlist(firstTurnB)
lTurnsNB <- lapply(1:11, function(n) turnpoints(unlist(nbDat$Liq[n,])))
value out of range in 'gammafn'
firstTurnNB <- lapply(1:11, function(m) lTurnsNB[[m]]$tppos[1])
nbDat$Auto.First.Turn <- unlist(firstTurnNB)
attach(bDat)
The following objects are masked from nbDat (pos = 3):
Auto.First.Turn, Bijel, Liq
The following objects are masked from bDat (pos = 4):
Auto.First.Turn, Bijel, Liq
The following objects are masked from nbDat (pos = 5):
Auto.First.Turn, Bijel, Liq
The following objects are masked from bDat (pos = 6):
Auto.First.Turn, Bijel, Liq
The following objects are masked from exp_Data (pos = 7):
Auto.First.Turn, Bijel
The following objects are masked from nbDat (pos = 8):
Bijel, Liq
The following objects are masked from bDat (pos = 9):
Bijel, Liq
The following objects are masked from bDat (pos = 10):
Bijel, Liq
The following objects are masked from nbDat (pos = 11):
Bijel, Liq
The following objects are masked from bDat (pos = 12):
Bijel, Liq
The following object is masked from exp_Data (pos = 13):
Bijel
The following object is masked from exp_Data (pos = 14):
Bijel
The following objects are masked from nbDat (pos = 15):
Bijel, Liq
The following objects are masked from bDat (pos = 16):
Bijel, Liq
The following objects are masked from nbDat (pos = 17):
Bijel, Liq
The following objects are masked from bDat (pos = 18):
Bijel, Liq
The following object is masked from exp_Data (pos = 19):
Bijel
The following object is masked from exp_Data (pos = 20):
Bijel
The following objects are masked from nbDat (pos = 21):
Bijel, Liq
The following objects are masked from bDat (pos = 22):
Bijel, Liq
The following object is masked from exp_Data (pos = 23):
Bijel
The following objects are masked from nbDat (pos = 24):
Auto.First.Turn, Bijel, Liq
The following objects are masked from bDat (pos = 25):
Auto.First.Turn, Bijel, Liq
The following objects are masked from nbDat (pos = 26):
Auto.First.Turn, Bijel, Liq
The following objects are masked from bDat (pos = 27):
Auto.First.Turn, Bijel, Liq
The following objects are masked from nbDat (pos = 28):
Bijel, Liq
The following objects are masked from bDat (pos = 29):
Bijel, Liq
The following objects are masked from nbDat (pos = 30):
Bijel, Liq
The following objects are masked from bDat (pos = 31):
Bijel, Liq
The following objects are masked from exp_Data (pos = 37):
Auto.First.Turn, Bijel
testDat1 <- data.frame(Bijel, Auto.First.Turn)
attach(nbDat)
The following objects are masked from bDat (pos = 3):
Auto.First.Turn, Bijel, Liq
The following objects are masked from nbDat (pos = 4):
Auto.First.Turn, Bijel, Liq
The following objects are masked from bDat (pos = 5):
Auto.First.Turn, Bijel, Liq
The following objects are masked from nbDat (pos = 6):
Auto.First.Turn, Bijel, Liq
The following objects are masked from bDat (pos = 7):
Auto.First.Turn, Bijel, Liq
The following objects are masked from exp_Data (pos = 8):
Auto.First.Turn, Bijel
The following objects are masked from nbDat (pos = 9):
Bijel, Liq
The following objects are masked from bDat (pos = 10):
Bijel, Liq
The following objects are masked from bDat (pos = 11):
Bijel, Liq
The following objects are masked from nbDat (pos = 12):
Bijel, Liq
The following objects are masked from bDat (pos = 13):
Bijel, Liq
The following object is masked from exp_Data (pos = 14):
Bijel
The following object is masked from exp_Data (pos = 15):
Bijel
The following objects are masked from nbDat (pos = 16):
Bijel, Liq
The following objects are masked from bDat (pos = 17):
Bijel, Liq
The following objects are masked from nbDat (pos = 18):
Bijel, Liq
The following objects are masked from bDat (pos = 19):
Bijel, Liq
The following object is masked from exp_Data (pos = 20):
Bijel
The following object is masked from exp_Data (pos = 21):
Bijel
The following objects are masked from nbDat (pos = 22):
Bijel, Liq
The following objects are masked from bDat (pos = 23):
Bijel, Liq
The following object is masked from exp_Data (pos = 24):
Bijel
The following objects are masked from nbDat (pos = 25):
Auto.First.Turn, Bijel, Liq
The following objects are masked from bDat (pos = 26):
Auto.First.Turn, Bijel, Liq
The following objects are masked from nbDat (pos = 27):
Auto.First.Turn, Bijel, Liq
The following objects are masked from bDat (pos = 28):
Auto.First.Turn, Bijel, Liq
The following objects are masked from nbDat (pos = 29):
Bijel, Liq
The following objects are masked from bDat (pos = 30):
Bijel, Liq
The following objects are masked from nbDat (pos = 31):
Bijel, Liq
The following objects are masked from bDat (pos = 32):
Bijel, Liq
The following objects are masked from exp_Data (pos = 38):
Auto.First.Turn, Bijel
testDat2 <- data.frame(Bijel, Auto.First.Turn)
testDat <- rbind(testDat1, testDat2)
testDat
Now apply the final model to the new data:
bijel_pred = predict(final_model, newdata = testDat)
bijel_true = testDat$Bijel
length(bijel_pred)
[1] 19
length(bijel_true)
[1] 19
success_count=length(bijel_pred[bijel_pred==bijel_true])
success_count
[1] 12
success_rate=success_count/length(bijel_pred)
paste0("Success rate: ",100*success_rate,"%")
[1] "Success rate: 63.1578947368421%"
null_rate = 1-(length(bijel_true[bijel_true=='y'])/length(bijel_pred))
paste0("Null rate: ", 100*null_rate, "%")
[1] "Null rate: 57.8947368421053%"
table(Predict=bijel_pred, true=bijel_true)
true
Predict y n
n 0 4
y 8 7
print(data.frame(bijel_pred, bijel_true=bijel_true))